library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1          ✔ purrr   0.2.4     
## ✔ tibble  1.4.1.9000     ✔ dplyr   0.7.4     
## ✔ tidyr   0.7.2          ✔ stringr 1.2.0     
## ✔ readr   1.1.1          ✔ forcats 0.2.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(nycflights13)
flights
nrow(flights)
## [1] 336776

We only want to see flights from american airways:

american_airways_with_delay <- filter(flights, (carrier == "AA") & (dep_delay > 0))

table(american_airways_with_delay$year)
## 
##  2013 
## 10162
ggplot(american_airways_with_delay, aes(x = time_hour, y = dep_delay)) +
  geom_point(alpha = 0.1, color = "red") + scale_y_log10(name = "Departure Delay in Minutes") + 
  ggtitle("Delays over the year") + 
  labs(x = "Time") + geom_smooth()
## `geom_smooth()` using method = 'gam'

american_airways_with_delay %>%
  mutate(more_than_10_mins_delay = dep_delay > 10) %>%
  group_by(month) %>%
  summarise(
    share_of_delayed_fligths = mean(more_than_10_mins_delay),
    maximum_delay = max(dep_delay)
  )
aggregated <- flights %>%
  filter(!(is.na(dep_delay))) %>%
  mutate(more_than_10_mins_delay = dep_delay > 10) %>%
  group_by(carrier) %>%
  summarise(
    share_of_delayed_fligths = mean(more_than_10_mins_delay),
    maximum_delay = max(dep_delay), 
    number_of_flights = n()
  ) %>%
  arrange(desc(share_of_delayed_fligths))

a <- "number_of_flights"
ggplot(aggregated, aes(x = number_of_flights)) + geom_histogram(bins = 10)

#  table(flights$dep_delay, exclude = NULL)
american_airways_with_delay %>%
  filter((carrier == "AA") & (dep_delay > 0))

Es gibt verschiedene Typen von daten in R

c(1, 2, 3)
## [1] 1 2 3
c(1, "a")
## [1] "1" "a"
list(1, list(2))
## [[1]]
## [1] 1
## 
## [[2]]
## [[2]][[1]]
## [1] 2
letters
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
data.frame(a = 1:10, b = letters[1:10], c = "")
table(c(1, 2, 1, 1, 2))
## 
## 1 2 
## 3 2

Conditions

1 == 2
## [1] FALSE
1 > 2
## [1] FALSE
1 >= 2
## [1] FALSE
1 < 2
## [1] TRUE
add_one <- function(x) {
  x + 1
}
add_y <- function(x, y) {
  x  + y
}

add_y(1, 3)
## [1] 4
add_one(4)
## [1] 5
create_plot <- function(data, variable_to_plot) {
  ggplot(data, aes_string(x = variable_to_plot)) + geom_histogram(bins = 10)
}

create_plot(flights, "arr_time")
## Warning: Removed 8713 rows containing non-finite values (stat_bin).

my_files <- list.files()
for (file in my_files) {
  print(my_files)
}
## [1] "flights_files"     "flights.nb.html"   "flights.Rmd"      
## [4] "play-music.R"      "r-with-marc.Rproj"
## [1] "flights_files"     "flights.nb.html"   "flights.Rmd"      
## [4] "play-music.R"      "r-with-marc.Rproj"
## [1] "flights_files"     "flights.nb.html"   "flights.Rmd"      
## [4] "play-music.R"      "r-with-marc.Rproj"
## [1] "flights_files"     "flights.nb.html"   "flights.Rmd"      
## [4] "play-music.R"      "r-with-marc.Rproj"
## [1] "flights_files"     "flights.nb.html"   "flights.Rmd"      
## [4] "play-music.R"      "r-with-marc.Rproj"